######################################################
#WA General 2014 Main Model
# When Women Run, Voters Will Follow (Sometimes): 
##Examining the Mobilizing Effect of Female Candidates in the 2014 and 2018 Midterm Elections 
#By Safarpour, Wyckoff Gaynor, Rouse, and Swers #
######################################################

#clean the environment.
rm(list=ls()) 

#Packages.
library(arm)
library(plyr)
library(mvtnorm) 
library(stargazer)
library(stats)
library(Hmisc)
library(dplyr)

#setwd
setwd("/Users/ACS/Dropbox/When Women Run/Revision_PoliticalBehavior/R&R Part 2/Publication Docs/Replication Data and Code/")

#Data.
WA<-read.csv("WA2014GeneralDataFinal.csv", 
             header = TRUE, stringsAsFactors = F)
names(WA)

#Model.      
m1<-glm(voted2014~ womangeneral+ female+ womangeneral*female+
          Millennials2014+ womangeneral*Millennials2014+
          GenerationX2014+ womangeneral*GenerationX2014+ 
          SilentGeneration2014+ womangeneral*SilentGeneration2014+ 
          voted2012+obamavote2012+totalreceipts, data=WA, family=binomial(link="logit"))
summary(m1)

preds.m1<- predict(m1, type="response")
summary(preds.m1) 

##Export model results to table for paper.
stargazer(m1, 
          out="WA2014GeneralCompetitivenessControl.html",
          type="html", covariate.labels = c("Female Candidate", "Female",  
                                            "Millennials",
                                            "Generation X",  
                                            "Silent Generation",
                                            "Voted in 2012 General Election", 
                                            "County Obama Vote 2012", 
                                            "Total Money Raised",
                                            "Female Candidate*Female",
                                            "Female Candidate*Millennial",
                                            "Female Candidate*Generation X",
                                            "Female Candidate*Silent Generation"),
          dep.var.labels="Voted in 2014 General Election", column.labels = "",
          title = "Effects of candidate gender, generation, gender, prior voting, county Obama vote share, and total money raised on 2014 Midterm Turnout in WA",
          notes        = "Results from logistic regression, standard errors in parentheses. Baseline age category: Baby Boomers.", 
          notes.append = TRUE, notes.align = "l", digits=3, single.row = T)


########################################
## Simulate CIs around predicted effects
########################################
n_draws <- 1000
set.seed(1714)
vcov<- vcov(m1)
coef<-coef(m1)
sim_coefs_m1 <- rmvnorm(n_draws, coef, vcov) #Specify the 1,000 simulated coefficients.
rbind(coef(m1), apply(sim_coefs_m1, 2, mean)) # Check they are close to original

#Create regression sample.
sample <- WA[complete.cases(WA$voted2014, WA$womangeneral, WA$female,
                            WA$Millennials2014, WA$GenerationX2014, 
                            WA$SilentGeneration2014,
                            WA$voted2012, WA$obamavote2012, WA$totalreceipts)==T,] # Regression sample

#Generate Predictions for male only candidates.
ppwomen0.s <- NULL
for (i in 1:n_draws) {
  ppwomen0.s[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                                   sim_coefs_m1[i,2]*0 + 
                                   sim_coefs_m1[i,3]*sample$female + 
                                   sim_coefs_m1[i,4]*sample$Millennials2014  +
                                   sim_coefs_m1[i,5]*sample$GenerationX2014 +
                                   sim_coefs_m1[i,6]*sample$SilentGeneration2014 +
                                   sim_coefs_m1[i,7]*sample$voted2012 +
                                   sim_coefs_m1[i,8]*sample$obamavote2012 +
                                   sim_coefs_m1[i,9]*sample$totalreceipts +
                                   sim_coefs_m1[i,10]*sample$female*0+
                                   sim_coefs_m1[i,11]*0*sample$Millennials2014+
                                   sim_coefs_m1[i,12]*0*sample$GenerationX2014+
                                   sim_coefs_m1[i,13]*0*sample$SilentGeneration2014))
}
mean(ppwomen0.s) 

#Generate Predictions for female only candidates.
ppwomen1.s <- NULL
for (i in 1:n_draws) {
  ppwomen1.s[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                                   sim_coefs_m1[i,2]*1 + 
                                   sim_coefs_m1[i,3]*sample$female + 
                                   sim_coefs_m1[i,4]*sample$Millennials2014  +
                                   sim_coefs_m1[i,5]*sample$GenerationX2014 +
                                   sim_coefs_m1[i,6]*sample$SilentGeneration2014 +
                                   sim_coefs_m1[i,7]*sample$voted2012 +
                                   sim_coefs_m1[i,8]*sample$obamavote2012 +
                                   sim_coefs_m1[i,9]*sample$totalreceipts +
                                   sim_coefs_m1[i,10]*sample$female*1+
                                   sim_coefs_m1[i,11]*1*sample$Millennials2014+
                                   sim_coefs_m1[i,12]*1*sample$GenerationX2014+
                                   sim_coefs_m1[i,13]*1*sample$SilentGeneration2014))
}
mean(ppwomen1.s) 

effect.01 <-  ppwomen1.s-ppwomen0.s
summary(effect.01) #  (0-1 women candidate effect)

# CIs and put results together in matrix
elements <- list(ppwomen0.s, ppwomen1.s, effect.01) # I split these two lines so the code is clear.
lapply(elements, summary) # See summary for each element of the list
ci2 <- do.call("rbind", (lapply(elements, quantile, c(.025,.975)))) # Get quantiles for each

results <- cbind(ci2[,1], lapply(elements, mean), ci2[,2])
colnames(results) <- c("2.5", "Mean", "97.5")   
rownames(results) <- c("Only Male Candidates", "Female Candidate", "Effect Male only-Female Candidate")
results

# visualize results.
meaneffect<-results[[6]]
upperci<-results[[3]]
lowerci<-results[[9]]

##Get Predictions by Generation.
w0.SG	<- NULL
w1.SG 	<- NULL
w0.BB	<- NULL
w1.BB 	<- NULL
w0.GX	<- NULL
w1.GX 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL
w0.GZ	<- NULL
w1.GZ 	<- NULL

SG <- subset(sample, SilentGeneration2014==1)
BB <- subset(sample, BabyBoomers2014==1)
GX <- subset(sample, GenerationX2014==1) 
M <- subset(sample, Millennials2014==1) 

for(i in 1:n_draws){ 
  w0.SG[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*0 + 
                              sim_coefs_m1[i,3]*SG$female + 
                              sim_coefs_m1[i,4]*SG$Millennials2014  +
                              sim_coefs_m1[i,5]*SG$GenerationX2014 +
                              sim_coefs_m1[i,6]*SG$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*SG$voted2012 +
                              sim_coefs_m1[i,8]*SG$obamavote2012 +
                              sim_coefs_m1[i,9]*SG$totalreceipts +
                              sim_coefs_m1[i,10]*SG$female*0+
                              sim_coefs_m1[i,11]*0*SG$Millennials2014+
                              sim_coefs_m1[i,12]*0*SG$GenerationX2014+
                              sim_coefs_m1[i,13]*0*SG$SilentGeneration2014))
  w1.SG[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*1 + 
                              sim_coefs_m1[i,3]*SG$female + 
                              sim_coefs_m1[i,4]*SG$Millennials2014  +
                              sim_coefs_m1[i,5]*SG$GenerationX2014 +
                              sim_coefs_m1[i,6]*SG$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*SG$voted2012 +
                              sim_coefs_m1[i,8]*SG$obamavote2012 +
                              sim_coefs_m1[i,9]*SG$totalreceipts +
                              sim_coefs_m1[i,10]*SG$female*1+
                              sim_coefs_m1[i,11]*1*SG$Millennials2014+
                              sim_coefs_m1[i,12]*1*SG$GenerationX2014+
                              sim_coefs_m1[i,13]*1*SG$SilentGeneration2014))
  w0.BB[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*0 + 
                              sim_coefs_m1[i,3]*BB$female + 
                              sim_coefs_m1[i,4]*BB$Millennials2014  +
                              sim_coefs_m1[i,5]*BB$GenerationX2014 +
                              sim_coefs_m1[i,6]*BB$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*BB$voted2012 +
                              sim_coefs_m1[i,8]*BB$obamavote2012 +
                              sim_coefs_m1[i,9]*BB$totalreceipts +
                              sim_coefs_m1[i,10]*BB$female*0+
                              sim_coefs_m1[i,11]*0*BB$Millennials2014+
                              sim_coefs_m1[i,12]*0*BB$GenerationX2014+
                              sim_coefs_m1[i,13]*0*BB$SilentGeneration2014))
  w1.BB[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*1 + 
                              sim_coefs_m1[i,3]*BB$female + 
                              sim_coefs_m1[i,4]*BB$Millennials2014  +
                              sim_coefs_m1[i,5]*BB$GenerationX2014 +
                              sim_coefs_m1[i,6]*BB$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*BB$voted2012 +
                              sim_coefs_m1[i,8]*BB$obamavote2012 +
                              sim_coefs_m1[i,9]*BB$totalreceipts +
                              sim_coefs_m1[i,10]*BB$female*1+
                              sim_coefs_m1[i,11]*1*BB$Millennials2014+
                              sim_coefs_m1[i,12]*1*BB$GenerationX2014+
                              sim_coefs_m1[i,13]*1*BB$SilentGeneration2014))
  w0.GX[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*0 + 
                              sim_coefs_m1[i,3]*GX$female + 
                              sim_coefs_m1[i,4]*GX$Millennials2014  +
                              sim_coefs_m1[i,5]*GX$GenerationX2014 +
                              sim_coefs_m1[i,6]*GX$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*GX$voted2012 +
                              sim_coefs_m1[i,8]*GX$obamavote2012 +
                              sim_coefs_m1[i,9]*GX$totalreceipts +
                              sim_coefs_m1[i,10]*GX$female*0+
                              sim_coefs_m1[i,11]*0*GX$Millennials2014+
                              sim_coefs_m1[i,12]*0*GX$GenerationX2014+
                              sim_coefs_m1[i,13]*0*GX$SilentGeneration2014))
  w1.GX[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                              sim_coefs_m1[i,2]*1 + 
                              sim_coefs_m1[i,3]*GX$female + 
                              sim_coefs_m1[i,4]*GX$Millennials2014  +
                              sim_coefs_m1[i,5]*GX$GenerationX2014 +
                              sim_coefs_m1[i,6]*GX$SilentGeneration2014 +
                              sim_coefs_m1[i,7]*GX$voted2012 +
                              sim_coefs_m1[i,8]*GX$obamavote2012 +
                              sim_coefs_m1[i,9]*GX$totalreceipts +
                              sim_coefs_m1[i,10]*GX$female*1+
                              sim_coefs_m1[i,11]*1*GX$Millennials2014+
                              sim_coefs_m1[i,12]*1*GX$GenerationX2014+
                              sim_coefs_m1[i,13]*1*GX$SilentGeneration2014))
  w0.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*0 + 
                             sim_coefs_m1[i,3]*M$female + 
                             sim_coefs_m1[i,4]*M$Millennials2014  +
                             sim_coefs_m1[i,5]*M$GenerationX2014 +
                             sim_coefs_m1[i,6]*M$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*M$voted2012 +
                             sim_coefs_m1[i,8]*M$obamavote2012 +
                             sim_coefs_m1[i,9]*M$totalreceipts +
                             sim_coefs_m1[i,10]*M$female*0+
                             sim_coefs_m1[i,11]*0*M$Millennials2014+
                             sim_coefs_m1[i,12]*0*M$GenerationX2014+
                             sim_coefs_m1[i,13]*0*M$SilentGeneration2014))
  w1.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*1 + 
                             sim_coefs_m1[i,3]*M$female + 
                             sim_coefs_m1[i,4]*M$Millennials2014  +
                             sim_coefs_m1[i,5]*M$GenerationX2014 +
                             sim_coefs_m1[i,6]*M$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*M$voted2012 +
                             sim_coefs_m1[i,8]*M$obamavote2012 +
                             sim_coefs_m1[i,9]*M$totalreceipts +
                             sim_coefs_m1[i,10]*M$female*1+
                             sim_coefs_m1[i,11]*1*M$Millennials2014+
                             sim_coefs_m1[i,12]*1*M$GenerationX2014+
                             sim_coefs_m1[i,13]*1*M$SilentGeneration2014))
}

effect.SG	<- w1.SG - w0.SG 
quantile(effect.SG, c(.025,.975))  
mean(effect.SG) 

effect.BB	<-  w1.BB - w0.BB
quantile(effect.BB, c(.025,.975))   
mean(effect.BB)  

effect.GX	<-  w1.GX - w0.GX
quantile(effect.GX, c(.025,.975))  
mean(effect.GX) 

effect.M	<-  w1.M - w0.M
quantile(effect.M, c(.025,.975)) 
mean(effect.M) 

#Effect by gender.
w0.F	<- NULL
w1.F 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL

Men <- subset(sample, female==0)
Women <- subset(sample, female==1)


for(i in 1:n_draws){ 
  w0.F[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*0 + 
                             sim_coefs_m1[i,3]*Women$female + 
                             sim_coefs_m1[i,4]*Women$Millennials2014  +
                             sim_coefs_m1[i,5]*Women$GenerationX2014 +
                             sim_coefs_m1[i,6]*Women$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Women$voted2012 +
                             sim_coefs_m1[i,8]*Women$obamavote2012 +
                             sim_coefs_m1[i,9]*Women$totalreceipts +
                             sim_coefs_m1[i,10]*Women$female*0+
                             sim_coefs_m1[i,11]*0*Women$Millennials2014+
                             sim_coefs_m1[i,12]*0*Women$GenerationX2014+
                             sim_coefs_m1[i,13]*0*Women$SilentGeneration2014))
  w1.F[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*1 + 
                             sim_coefs_m1[i,3]*Women$female + 
                             sim_coefs_m1[i,4]*Women$Millennials2014  +
                             sim_coefs_m1[i,5]*Women$GenerationX2014 +
                             sim_coefs_m1[i,6]*Women$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Women$voted2012 +
                             sim_coefs_m1[i,8]*Women$obamavote2012 +
                             sim_coefs_m1[i,9]*Women$totalreceipts +
                             sim_coefs_m1[i,10]*Women$female*1+
                             sim_coefs_m1[i,11]*1*Women$Millennials2014+
                             sim_coefs_m1[i,12]*1*Women$GenerationX2014+
                             sim_coefs_m1[i,13]*1*Women$SilentGeneration2014))
  w0.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*0 + 
                             sim_coefs_m1[i,3]*Men$female + 
                             sim_coefs_m1[i,4]*Men$Millennials2014  +
                             sim_coefs_m1[i,5]*Men$GenerationX2014 +
                             sim_coefs_m1[i,6]*Men$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Men$voted2012 +
                             sim_coefs_m1[i,8]*Men$obamavote2012 +
                             sim_coefs_m1[i,9]*Men$totalreceipts +
                             sim_coefs_m1[i,10]*Men$female*0+
                             sim_coefs_m1[i,11]*0*Men$Millennials2014+
                             sim_coefs_m1[i,12]*0*Men$GenerationX2014+
                             sim_coefs_m1[i,13]*0*Men$SilentGeneration2014))
  w1.M[i] <- mean(invlogit(sim_coefs_m1[i,1] + 
                             sim_coefs_m1[i,2]*1 + 
                             sim_coefs_m1[i,3]*Men$female + 
                             sim_coefs_m1[i,4]*Men$Millennials2014  +
                             sim_coefs_m1[i,5]*Men$GenerationX2014 +
                             sim_coefs_m1[i,6]*Men$SilentGeneration2014 +
                             sim_coefs_m1[i,7]*Men$voted2012 +
                             sim_coefs_m1[i,8]*Men$obamavote2012 +
                             sim_coefs_m1[i,9]*Men$totalreceipts +
                             sim_coefs_m1[i,10]*Men$female*1+
                             sim_coefs_m1[i,11]*1*Men$Millennials2014+
                             sim_coefs_m1[i,12]*1*Men$GenerationX2014+
                             sim_coefs_m1[i,13]*1*Men$SilentGeneration2014))
}

effect.Men	<-  w1.M - w0.M
quantile(effect.Men, c(.025,.975)) #to get CIs;  
mean(effect.Men)  

effect.Women	<-  w1.F - w0.F
quantile(effect.Women, c(.025,.975)) #to get CIs;  
mean(effect.Women) 

#Store the Effects.
Election<-c(rep("2014 General",8))
Effect<-c(meaneffect, mean(effect.Women), mean(effect.Men), NA,mean(effect.M),mean(effect.GX),mean(effect.BB),mean(effect.SG))
LowerCI<-c(lowerci,  quantile(effect.Women, c(.025)), quantile(effect.Men, c(.025)), NA,quantile(effect.M, c(.025)),quantile(effect.GX, c(.025)),quantile(effect.BB, c(.025)),quantile(effect.SG, c(.025)))
UpperCI<-c(upperci, quantile(effect.Women, c(.975)), quantile(effect.Men, c(.975)), NA,quantile(effect.M, c(.975)),quantile(effect.GX, c(.975)),quantile(effect.BB, c(.975)),quantile(effect.SG, c(.975)))
WAgeneral<-data.frame(Effect, UpperCI, LowerCI, Election) 
WAgeneral$`Registrant Subgroup`<-factor(c(1,2,3,4,5,6,7,8), levels=c(1,2,3,4,5,6,7,8),
                                        labels=c("Overall", "Women", "Men", "Gen. Z", "Millennials", "Gen. X", "Baby\nBoomers", "Silent Gen."), ordered = T)
#write effects to data frame.
write.csv(WAgeneral, "WA2014GeneralEffectsMainModel.csv", row.names=F)

